import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
%matplotlib inline
import math
activities = pd.read_csv('activities_dataframe.csv', index_col=False)
activities = activities.set_index(['person_id', 'act_num']).fillna(0).drop('Unnamed: 0', 1)
activities['act_start_time'] = activities['act_start_time'].astype(str)
activities['act_end_time'] = activities['act_end_time'].astype(str)
activities['act_start_time'] = activities['act_start_time'].replace('0', '00:00:00')
activities['act_end_time'] = activities['act_end_time'].replace('0', '00:00:00')
activities.head()
a = pd.read_csv('trips_dataframe.csv', index_col=False)
a['planned_mode'].value_counts().nlargest(n=10)
trips = pd.read_csv('trips_dataframe.csv', index_col=False)
trips = trips.set_index(['person_id', 'dest_act_num']).drop(['Unnamed: 0',
'cost', 'realized_mode',
'incentive',
'simulation_id'], axis=1)
trips['trip_duration'] = trips['trip_end'].sub(trips['trip_start'], axis = 0)
trips = trips.drop(['025900-2014000086688-0-3490800', '025900-2013001118097-0-2601636']) # two invalid rows
trips['planned_mode'] = trips['planned_mode'].replace("walk_transit", "transit")
trips['planned_mode'] = trips['planned_mode'].replace("drive_transit", "transit")
trips['depart_hour'] = trips['trip_start'].apply(lambda x: x/3600).round(0) # calculating departing hour
trips['planned_mode'] = trips['planned_mode'].fillna("walk") # replace null values to "walk"
trips['dist_interval(km)'] = trips['distance'].apply(lambda x: x/1000).round(0)
trips.sample(n=5)
trips['trip_duration'].describe()
trips['planned_mode'].value_counts().nlargest(n=10)
df = trips.merge(activities, left_index = True, right_on=['person_id', 'act_num']).drop(['act_start_time', 'act_end_time'], axis=1)
df.head()
df['planned_mode'].value_counts().nlargest(n=10)
plt.figure(figsize=(15, 10))
sns.countplot(x='depart_hour', hue='planned_mode', data=df)
plt.figure(figsize=(15, 10))
sns.countplot(x='act_type', hue='planned_mode', data=df)
sns.set(font_scale=3)
sns.catplot(x='act_type', hue='planned_mode', col='depart_hour', kind='count', data=df, height=10, aspect=2, col_wrap=2)
sns.set(font_scale=1)
plt.figure(figsize=(15, 10))
sns.countplot(x='dist_interval(km)', hue='planned_mode', data=df)
===================================================================================================================
network = pd.read_csv('network.csv')
network_link = network[['linkId', 'fromLocationX', 'fromLocationY']]
# convert helper function to produce the link df for graphing
def convert(df):
new_df = df['act_nearest_link'].value_counts().to_frame().reset_index().\
rename(columns={'index':'linkId', 'act_nearest_link': 'size', 'depart_hour': 'depart_hour'}).\
join(network_link.set_index('linkId'), 'linkId').set_index('linkId')
return new_df
df.head()
# clear the double index from the df used above, since we only need the link info, depart time for time periods,
# and planned_mode for the demand visualization, drop all other columns
mode_to_link = df[['act_nearest_link', 'depart_hour', 'planned_mode']].reset_index()
#.reset_index().set_index('act_nearest_link')
mode_to_link = mode_to_link.drop(['person_id', 'act_num'], axis = 1)
mode_to_link.head()
# per your request, and also by the fact that each link doesn't associate to a mode one-to-one(ly)
# I chose to seperate the modes and further seperate them by the hours used from the demand mapping
# * like last time, the time range is based on the departing time
car = mode_to_link.loc[mode_to_link['planned_mode'] == 'car']
transit = mode_to_link.loc[mode_to_link['planned_mode'] == 'transit']
walk = mode_to_link.loc[mode_to_link['planned_mode'] == 'walk']
ride_hail = mode_to_link.loc[mode_to_link['planned_mode'] == 'ride_hail']
car_early_morning_mode = convert(car[(car['depart_hour'] >= 0) & (car['depart_hour'] < 6)])
car_am_peak_mode = convert(car[(car['depart_hour'] >= 6) & (car['depart_hour'] < 9)])
car_mid_day_mode = convert(car[(car['depart_hour'] >= 9) & (car['depart_hour'] < 16)])
car_pm_peak_mode = convert(car[(car['depart_hour'] >= 16) & (car['depart_hour'] < 19)])
car_night_mode = convert(car[(car['depart_hour'] >= 19)])
transit_early_morning_mode = convert(transit[(transit['depart_hour'] >= 0) & (transit['depart_hour'] < 6)])
transit_am_peak_mode = convert(transit[(transit['depart_hour'] >= 6) & (transit['depart_hour'] < 9)])
transit_mid_day_mode = convert(transit[(transit['depart_hour'] >= 9) & (transit['depart_hour'] < 16)])
transit_pm_peak_mode = convert(transit[(transit['depart_hour'] >= 16) & (transit['depart_hour'] < 19)])
transit_night_mode = convert(transit[(transit['depart_hour'] >= 19)])
walk_early_morning_mode = convert(walk[(walk['depart_hour'] >= 0) & (walk['depart_hour'] < 6)])
walk_am_peak_mode = convert(walk[(walk['depart_hour'] >= 6) & (walk['depart_hour'] < 9)])
walk_mid_day_mode = convert(walk[(walk['depart_hour'] >= 9) & (walk['depart_hour'] < 16)])
walk_pm_peak_mode = convert(walk[(walk['depart_hour'] >= 16) & (walk['depart_hour'] < 19)])
walk_night_mode = convert(walk[(walk['depart_hour'] >= 19)])
ride_hail_early_morning_mode = convert(ride_hail[(ride_hail['depart_hour'] >= 0) & (ride_hail['depart_hour'] < 6)])
ride_hail_am_peak_mode = convert(ride_hail[(ride_hail['depart_hour'] >= 6) & (ride_hail['depart_hour'] < 9)])
ride_hail_mid_day_mode = convert(ride_hail[(ride_hail['depart_hour'] >= 9) & (ride_hail['depart_hour'] < 16)])
ride_hail_pm_peak_mode = convert(ride_hail[(ride_hail['depart_hour'] >= 16) & (ride_hail['depart_hour'] < 19)])
ride_hail_night_mode = convert(ride_hail[(ride_hail['depart_hour'] >= 19)])
car_early_morning_mode.head()
# now this df contains the info we need to plot
# size of the circle for the demand
# location x, y
# ideally, I would perfer to see four different modes on the same plot for each time period, which (could)
# give a better visualization of comparison of those demands?
# the overlaying layer of network dots of better geological visual
network_loc = network[['attributeOrigType', 'fromLocationX', 'fromLocationY']]
# git rid of some outlier points for better visual
network_loc = network_loc[(network_loc['fromLocationX'] < 560000) & (network_loc['fromLocationY'] > 4170000)]
===================================================================================================================
car_early_morning_mode.sort_values(by=['size'], ascending=False).head()
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=5,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=car_early_morning_mode,
size='size',
sizes=(1, 300),
color='yellow',
ax = ax).set_title('Early morning mode split by: CAR')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=0.1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=car_am_peak_mode,
size='size',
sizes=(1, 300),
color='yellow',
ax = ax).set_title('AM Peak mode split by: CAR')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=car_mid_day_mode,
size='size',
sizes=(1, 300),
color='yellow',
ax = ax).set_title('Mid Day mode split by: CAR')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=car_pm_peak_mode,
size='size',
sizes=(1, 300),
color='yellow',
ax = ax).set_title('PM Peak mode split by: CAR')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=car_night_mode,
size='size',
sizes=(1, 300),
color='yellow',
ax = ax).set_title('Night time mode split by: CAR')
===================================================================================================================
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=transit_am_peak_mode,
size='size',
sizes=(1, 300),
color='red',
ax = ax).set_title('AM Peak mode split by: TRANSIT')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=transit_am_peak_mode,
size='size',
sizes=(1, 300),
color='red',
ax = ax).set_title('AM Peak mode split by: TRANSIT')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=transit_mid_day_mode,
size='size',
sizes=(1, 300),
color='red',
ax = ax).set_title('Mid Day mode split by: TRANSIT')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=transit_pm_peak_mode,
size='size',
sizes=(1, 300),
color='red',
ax = ax).set_title('PM Peak mode split by: TRANSIT')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=transit_night_mode,
size='size',
sizes=(1, 300),
color='red',
ax = ax).set_title('Night mode split by: TRANSIT')
===================================================================================================================
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data= walk_early_morning_mode,
size='size',
sizes=(1, 300),
color='blue',
ax = ax).set_title('Early Morning split by: WALK')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=walk_am_peak_mode,
size='size',
sizes=(1, 300),
color='blue',
ax = ax).set_title('AM Peak split by: WALK')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=walk_pm_peak_mode,
size='size',
sizes=(1, 300),
color='blue',
ax = ax).set_title('PM Peak split by: WALK')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=walk_night_mode,
size='size',
sizes=(1, 300),
color='blue',
ax = ax).set_title('Night mode split by: WALK')
===================================================================================================================
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=ride_hail_early_morning_mode,
size='size',
sizes=(1, 300),
color='green',
ax = ax).set_title('Early Morning mode split by: RIDE HAIL')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=ride_hail_am_peak_mode,
size='size',
sizes=(1, 300),
color='green',
ax = ax).set_title('AM Peak mode split by: RIDE HAIL')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=ride_hail_mid_day_mode,
size='size',
sizes=(1, 300),
color='green',
ax = ax).set_title('Mid Day mode split by: RIDE HAIL')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=ride_hail_pm_peak_mode,
size='size',
sizes=(1, 300),
color='green',
ax = ax).set_title('PM Peak mode split by: RIDE HAIL')
fig, ax = plt.subplots(figsize=(15, 15))
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=network_loc, size=1,
color='black',
ax=ax)
sns.scatterplot(x="fromLocationX", y="fromLocationY",
data=ride_hail_night_mode,
size='size',
sizes=(1, 300),
color='green',
ax = ax).set_title('Night mode split by: RIDE HAIL')